#!/usr/bin/env bash
# mindbender-produce-reports -- produce all reports by instantiating what's enumerated in the dashboard configuration
# Usage:
# > mindbender produce-reports SNAPSHOT_DIR SNAPSHOT_CONFIG
#
# Author: Jaeho Shin <netj@cs.stanford.edu>
# Created: 2015-02-08
set -eu

[[ $# -gt 0 ]] || usage "$0" "Missing SNAPSHOT_DIR"
SnapshotDir=$1; shift

[[ $# -gt 0 ]] || usage "$0" "Missing SNAPSHOT_CONFIG"
SnapshotConfiguration=$1; shift

# find path to the DeepDive app based on the snapshot path
DEEPDIVE_APP=$(find-deepdive-app)
export DEEPDIVE_APP

reportTemplateImportOrder=(
"$MINDBENDER_HOME"/etc/snapshot-template
"$DEEPDIVE_APP"/snapshot-template
)

# normalize some path names
SnapshotDir=${SnapshotDir%/}

# make sure there is a dashboard snapshot configuration in DeepDive app as well as the given snapshot
[[ -e "$SnapshotConfiguration" ]] || error "$SnapshotConfiguration: SNAPSHOT_CONFIG does not exist"
{
    echo "# snapshot at $(date +%FT%T)"
    cat "$SnapshotConfiguration"
} | tee "$SnapshotDir"/snapshot.last.conf >>"$SnapshotDir"/snapshot.conf
snapshotConfigurationPath="snapshot.last.conf"

# move into the given snapshot
cd "$SnapshotDir" || error "$SnapshotDir: No such snapshot"
[[ -r "$snapshotConfigurationPath" ]] || error "No snapshot configuration to run"

# TODO factor out these shell functions into separate scripts, and add unit tests

## some utility commands
quote-html-attr() {
    local s=
    for s; do
        s=${s//&/&amp;}
        s=${s//</&lt;}
        s=${s//\"/&quot;}
        echo -n "$s"
    done
}
assign-unique-serial() {
    local prefix=$1             # prefix string
    local satisfiesCondition=$2 # command for testing collisions
    local separator=${3:-'-'}   # separator between the prefix and serial
    local serial=${4:-2}        # base serial number to start with
    if ! "$satisfiesCondition" "$prefix"; then
        while ! "$satisfiesCondition" "$prefix$separator$serial"; do
            let ++serial || true
        done
        echo "$prefix$separator$serial"
    else
        echo "$prefix"
    fi
}

## commands for snapshot-template and instance handling
# TODO factor out as a standalone command
list-all-reports() {
    local instance=$1; shift
    # find all executable report.sh in the given instance and output their dirnames
    (
        cd "$instance"
        list-reports() {
            # consider a directory as a report if any of these statisfy:
            # - an executable report.sh exists
            # - a README.md.in file exists
            local p=
            for p; do find "$p" \
                \( -name report.sh \
                -o -name README.md.in \
                \) 2>/dev/null | sort || true
            done
            # TODO exclude task templates
        }
        # TODO use list-all-reports recursively to support nested reports.order
        if [[ -s reports.order ]]; then
            # if the order is specified, list paths carefully 
            local listed=$(mktemp -d "${TMPDIR:-/tmp}"/order-by.XXXXXXX)
            {
                # first, print the top half
                sed '/^\*$/,$d' <reports.order |
                (
                    set --; while read globPattern; do set -- "$@" $globPattern; done
                    [[ $# -eq 0 ]] || list-reports "$@"
                ) | tee "$listed"/either
                # next, get the bottom half
                { echo; cat reports.order; } | sed '1,/^\*$/d' |
                (
                    set --; while read globPattern; do set -- "$@" $globPattern; done
                    [[ $# -eq 0 ]] || list-reports "$@"
                ) | tee -a "$listed"/either >"$listed"/bottom
                # print the rest in the middle excluding those either in top or bottom
                sort -r <"$listed"/either >"$listed"/either-reversed  # XXX workaround for FreeBSD grep
                list-reports * | grep -vxFf "$listed"/either-reversed || true
                # finally, print the bottom
                cat "$listed"/bottom
            }
            rm -rf "$listed"
        else
            # otherwise, just enumerate reports
            list-reports *
        fi |
        while read file; do dirname "$file"; done |
        awk '!seen[$0]++ {print}'  # only show the first occurences
    )
}
create-report-instance() {
    local name=$1; shift
    local reportsPrefix=$1; shift
    # create a directory for the report instance
    local instance=$(
        no-instance-already-there() { ! [[ -e "$1" ]]; }
        assign-unique-serial "$reportsPrefix$name" no-instance-already-there
    )
    mkdir -p "$instance"
    # import all report templates found by $name
    local dir= numImports=0
    for dir in "${reportTemplateImportOrder[@]}"; do
        local template="$dir/$name"
        [[ -d "$template" ]] || continue
        # clone all template files into the instance
        rsync -a --copy-unsafe-links "$template"/ "$instance"/ --exclude=reports.order
        # merge reports.order
        ! [[ -s "$template"/reports.order ]] ||
            merge-order-specs "$instance"/reports.order "$template"/reports.order
        let ++numImports
    done
    [[ $numImports -gt 0 ]] || error "$name: No such report template"
    # make sure the instance includes report.params of all parents
    for dir in "${reportTemplateImportOrder[@]}"; do
        list-breadcrumb-paths "$name" |
        head -n -1 | # exclude the last line that is $name itself
        while read p; do
            local params="$dir/$p"/report.params
            [[ -e "$params" ]] || continue
            cp -f "$params" "$reportsPrefix$p"/
        done
    done
    ensure-nested-reports-inherits-params "$instance"
    echo "$instance"
}
ensure-nested-reports-inherits-params() {
    local instance=$1; shift
    local report= p=
    # make sure all nested reports in the instance inherits their parents' report.params
    list-all-reports "$instance" |
    (
        while read report; do
            case $report in
                .) report="$instance" ;;
                *) report="$instance/$report"
            esac
            list-breadcrumb-paths "$report" |
            while read p; do
                cat "$p"/report.params 2>/dev/null || true
            done >"$report"/.report.params
        done
    )
}
compile-task-params() {
    local reportId=$1; shift
    local reportName=$1; shift
    local reportPath=$1; shift
    local taskName=$1; shift
    local taskPath=$1; shift
    { # compile task.params into report.params
        scope() {
            local scopeType=$1; shift
            case $scopeType in
                report) ;;
                *) error "$scopeType: Invalid scope for task $taskPath"
            esac
            local scopeValue=
            for scopeValue; do
                # find a matching scope
                ! [[ $reportName = $scopeValue || $reportName = $scopeValue/* ]] || return 0
            done
            error "report $reportId is out of scope for task $taskName"
        }
        required() { escape-args-for-shell required "$@" >>"$taskPath"/report.params; }
        optional() { escape-args-for-shell optional "$@" >>"$taskPath"/report.params; }
        ( source "$taskPath"/task.params )
        ensure-nested-reports-inherits-params "$taskPath"
    }
    { # make sure task inherits parameter bindings from the its scope report
        echo "## parameters from report $reportPath"
        cat "$reportPath"/.report.params.sh
        echo
        echo "## parameters for task $taskPath"
    } >"$taskPath"/.report.params.sh
}
record-report-params() {
    local reportName=$1; shift
    local reportPath=$1; shift
    (
        cd "$reportPath"
        # generate .report.params.sh (recording given parameters in shell script) based on report.params specification
        {
            # produce a line for each parameter passed
            echo "# named parameters given"
            local namedParam=
            for namedParam in "$@"; do
                local name=${namedParam%%=*}
                local value=${namedParam#*=}
                escape-args-for-shell export "$name=$value"
            done
            echo
        } >>.report.params.sh
        :  >.report.params.required
        :  >.report.params.optional

        # interpret the report.params specification
        if [[ -e .report.params ]]; then
            # vocabularies allowed in the report.params file, to generate shell script lines
            required() {
                local nametype=$1; shift
                local desc=$1; shift
                # parse optional type from name
                local name= type=
                name=${nametype%:*}
                type=${nametype#$name}
                type=${type#:}
                echo >>.report.params.sh ": \${$name:?Missing parameter${type:+ of type $type}: $desc} # required"
                echo >>.report.params.required "$name"
            }
            optional() {
                local nametype=$1; shift
                local valueDefault=$1; shift
                local desc=$1; shift
                # parse optional type from name
                local name= type=
                name=${nametype%:*}
                type=${nametype#$name}
                type=${type#:}
                echo >>.report.params.sh ": \${$name:=$valueDefault}  # $desc"
                echo >>.report.params.optional "$name"
            }
            ( source .report.params )
            params=$(cat .report.params.{required,optional} 2>/dev/null)
            [[ -z "$params" ]] ||
            echo >>.report.params.sh export $params
        fi

        # TODO warn extra params

        # record parameters in JSON as well
        local paramsArgs=
        for name in $(cat .report.params.{required,optional} 2>/dev/null); do
            paramsArgs+=$name'="$'$name'" '
        done
        paramsArgs+='"$@"'
        bash -c "
            source .report.params.sh
            rm -f report.params.json
            JSON_FILE=report.params.json \\
                report-values $paramsArgs
        " "$@"

        # generate a report id from the required parameters
        id=$(generate-report-id "$reportName" "$reportPath" "$@")
        echo "$id" >.report.id 
        # mark them read-only
        chmod -w .report.params* .report.id
    )
}
# generates the id for a report that interpolates required parameter values
# next to the templates that require them, e.g.:
# /variable(foo.is_true)/feature/sample(bar)/histogram
generate-report-id() {
    local reportName=$1; shift
    local reportPath=$1; shift
    id=$(
        # traverse up the parents to produce the desired report id
        cd "$DEEPDIVE_APP/$SnapshotDir/$reportPath"
        reportNamePrefix=$reportName
        list-breadcrumb-paths "$reportName" |
        while read; do
            name=${reportNamePrefix##*/}
            # name
            echo -n "$name"
            # and required parameters
            if [[ -e report.params ]]; then
                argsExpr=$(
                    (
                        required() { echo "${1%:*}"; }
                        optional() { :; }
                        source report.params
                    ) |
                    while read param; do
                        echo -n '"${'"$param"'}",'
                    done
                )
                argsExpr=${argsExpr%,}
                [[ -z "$argsExpr" ]] || printf '(%s)' "$(
                        source .report.params.sh
                        eval "printf '%s' $argsExpr"
                    )"
            fi
            echo
            cd ..
            reportNamePrefix=${reportNamePrefix%/*}
        done |
        # reverse and assemble as a path
        tac | tr '\n' '/'
    )
    id=${id%/}
    # make sure the report id is not used already, or append a unique serial number
    id-is-available() {
        ! grep -qxF "$1" "$DEEPDIVE_APP/$SnapshotDir"/reports.ids
    }
    assign-unique-serial "$id" id-is-available
}
produce-report() {
    local reportPath=$1; shift
    local reportName=$1; shift
    # run report.sh under an isolated environment
    (
        cd "$reportPath"
        # load parameters
        source .report.params.sh
        # execute report.sh and mark it as done afterwards
        if [[ -x report.sh ]]; then
            # passing full path to the script as argv[0] for better error messages
            (exec -a "$SnapshotDir/$reportPath/report.sh" ./report.sh)
            chmod -x report.sh
        elif [[ -e report.sh ]]; then
            # warn about executable bit
            error "$SnapshotDir/$reportPath/report.sh: skipped because not executable" || true
        elif ls *.in &>/dev/null; then
            # compile executable documents (*.in) under the same environment
            (
                # move up to the app to show better error messages
                cd "$DEEPDIVE_APP"
                compile-xdocs "$SnapshotDir/$reportPath"/*.in
            )
        fi
    )
}
produce-nested-reports() {
    local templateBaseName=$1; shift
    local reportBaseName=$1; shift
    local instance=$1; shift
    # run all report.sh scripts found in the report instance
    list-all-reports "$instance" |
    while read report; do
        # determine name and path
        templateName=$templateBaseName reportName=$reportBaseName reportPath=$instance
        case $report in .) ;; *) templateName+="/$report" reportName+="/$report" reportPath+="/$report"; esac
        report-log "Producing $SnapshotDir/$reportPath"
        # first record parameter bindings for the instance
        record-report-params "$reportName" "$reportPath" "$@"
        # then produce the report
        if produce-report "$reportPath" "$reportName" "$@"; then
            # and register it when successful
            register-report "$templateName" "$reportName" "$reportPath" "$@"
        fi
    done
    report-log "Produced $SnapshotDir/$instance"
}

## commands for producing the snapshot-level report
begin-snapshot() {
    # initialize
    [[ -e reports.ids   ]] || : >reports.ids
    [[ -e reports.paths ]] || : >reports.paths
    [[ -e reports.json  ]] || echo >reports.json '{}'

    [[ -e README.md     ]] || cat >README.md <<EOF
# $SnapshotDir
EOF

    echo >>README.md "<!-- Production begins at $(date +%FT%T) -->"
}
end-snapshot() {
    # no-op
    echo >>README.md "<!-- Production ends at $(date +%FT%T) -->"
}
register-report() {
    local templateName=$1; shift
    local reportName=$1; shift
    local reportPath=$1; shift

    # record id as used
    cat "$reportPath"/.report.id >>reports.ids

    # record path to the report
    echo "$reportPath" >>reports.paths

    # record name of the template for the report
    echo "$reportName" >>reports.names

    # concatenate the produced report to the snapshot-level README.md
    ! [[ -e "$reportPath"/README.md ]] ||
    {
        cat "$reportPath"/README.md
        echo
    } >>README.md

    # aggregate the report.params.json and report.json in reports.json indexed by .report.id
    JSON_FILE=reports.json \
        report-values --alternating "$(cat "$reportPath"/.report.id)" "{$(
            cd "$reportPath"
            # first, the path, name of the report
            echo -n ' "template":'; json-string "$templateName"
            echo -n ',"report":'  ; json-string "$reportName"
            echo -n ',"path":'    ; json-string "$reportPath"
            # and record report.params.json
            echo -n ',"params":'; cat report.params.json || echo '{}'
            # then, report.json
            if [[ -s report.json ]]; then
                echo -n ',"values":'
                cat report.json
            fi
        )}"
}

## commands snapshot.conf is expected to rely on
section() {
    local title=$1
    # add a heading for this section in snapshot-level README.md
    {
        echo
        echo "# $title"
        echo
    } >>README.md
}
report() {
    local templateName=$1; shift
    local instance=$(create-report-instance "$templateName" "reports/" "$@")  # keeping new instances under reports/
    report-log "Instantiated report $SnapshotDir/$instance${*:+ ($*)}"
    produce-nested-reports "$templateName" "$templateName" "$instance" "$@"
}

## additional commands snapshot.conf may contain for running tasks
report-task() {
    local reportId=$1; shift
    local taskName=$1; shift
    # find the target report
    local reportPath=$(
        # lookup the report path for the id
        paste reports.{ids,paths} | grep -F "$reportId"$'\t' | cut -f2
    )
    [[ -d "$reportPath" ]] || error "$reportId: No such report found"
    local reportName=$(sed 's/([^()]*)//g' <<<"$reportId")
    # instantiate and run
    local instance=$(create-report-instance "$taskName" "$reportPath/" "$@")  # keeping new instances under given report
    report-log "Instantiated task $SnapshotDir/$instance${*:+ ($*)}"
    compile-task-params "$reportId" "$reportName" "$reportPath" "$taskName" "$instance"
    produce-nested-reports "$taskName" "$reportName/$taskName" "$instance" "$@"
}


## produce reports as per dashboard configuration in the snapshot
set -o pipefail
trap 'report-log "Aborting due to an error"; error "Failed producing reports"' ERR
{
report-log "Producing reports under $SnapshotDir/"
report-log " about DeepDive database $(find-deepdive-db-uri)..."

# use dashboard configuration to produce reports
begin-snapshot
( source "$snapshotConfigurationPath" )
end-snapshot

numReports=$(wc -l <reports.paths || echo 0)
report-log "Produced $numReports reports under $SnapshotDir/"
} 2>&1 | tee -a reports.log >&2
